Since these are larger (32x32x3) images, it may prove useful to speed up your training time by using a GPU. CUDA is a parallel computing platform and CUDA Tensors are the same as typical Tensors, only they utilize GPU's for computation.
In [1]:
import torch
import numpy as np
# Check if CUDA is available
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
print('CUDA is not available. Training on CPU ...')
else:
print('CUDA is available! Training on GPU ...')
In [2]:
# Forcing to train on the CPU
train_on_gpu = False
Downloading may take a minute. We load in the training and test data, split the training data into a training and validation set, then create DataLoaders for each of these sets of data.
In [3]:
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
# Number of subprocesses to use for data loading
num_workers = 0
# How many samples per batch to load
batch_size = 20
# Percentage of training set to use as validation
valid_size = 0.2
# Convert data to a normalized torch.FloatTensor
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=(0.5, 0.5, 0.5),
std=(0.5, 0.5, 0.5))
])
# Choose the training and test datasets
train_data = datasets.CIFAR10(root='data',
train=True,
download=True,
transform=transform)
test_data = datasets.CIFAR10(root='data',
train=False,
download=True,
transform=transform)
# Obtain training indices that will be used for validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]
# Define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(indices=train_idx)
valid_sampler = SubsetRandomSampler(indices=valid_idx)
# Prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(dataset=train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(dataset=train_data,
batch_size=batch_size,
sampler=valid_sampler,
num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(dataset=test_data,
batch_size=batch_size,
num_workers=num_workers)
# Specify the image classes
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
In [4]:
import matplotlib.pyplot as plt
%matplotlib inline
# Helper function to un-normalize and display an image
def imshow(img):
img = img / 2 + 0.5 # Unnormalize
plt.imshow(np.transpose(img, (1, 2, 0))) # Convert from Tensor image
In [5]:
# Obtain one batch of training images
dataiter = iter(train_loader)
images, labels = dataiter.next()
images = images.numpy() # convert images to numpy for display
# plot the images in the batch, along with the corresponding labels
fig = plt.figure(figsize=(25, 4))
# Display 20 images
for idx in np.arange(20):
ax = fig.add_subplot(2, 20/2, idx+1, xticks=[], yticks=[])
imshow(images[idx])
ax.set_title(classes[labels[idx]])
In [6]:
images[0].shape
Out[6]:
In [7]:
rgb_img = np.squeeze(images[3])
channels = ['red channel', 'green channel', 'blue channel']
fig = plt.figure(figsize = (36, 36))
for idx in np.arange(rgb_img.shape[0]):
ax = fig.add_subplot(1, 3, idx + 1)
img = rgb_img[idx]
ax.imshow(img, cmap='gray')
ax.set_title(channels[idx])
width, height = img.shape
thresh = img.max()/2.5
for x in range(width):
for y in range(height):
val = round(img[x][y],2) if img[x][y] !=0 else 0
ax.annotate(str(val), xy=(y,x),
horizontalalignment='center',
verticalalignment='center', size=8,
color='white' if img[x][y]<thresh else 'black')
This time, you'll define a CNN architecture. Instead of an MLP, which used linear, fully-connected layers, you'll use the following:
A network with 2 convolutional layers is shown in the image below and in the code, and you've been given starter code with one convolutional and one maxpooling layer.
The more convolutional layers you include, the more complex patterns in color and shape a model can detect. It's suggested that your final model include 2 or 3 convolutional layers as well as linear layers + dropout in between to avoid overfitting.
It's good practice to look at existing research and implementations of related models as a starting point for defining your own models. You may find it useful to look at this PyTorch classification example or this, more complex Keras example to help decide on a final structure.
To compute the output size of a given convolutional layer we can perform the following calculation (taken from Stanford's cs231n course):
We can compute the spatial size of the output volume as a function of the input volume size (W), the kernel/filter size (F), the stride with which they are applied (S), and the amount of zero padding used (P) on the border. The correct formula for calculating how many neurons define the output_W is given by
(W−F+2P)/S+1.
For example for a 7x7 input and a 3x3 filter with stride 1 and pad 0 we would get a 5x5 output. With stride 2 we would get a 3x3 output.
In [8]:
import torch.nn as nn
import torch.nn.functional as F
# Define the CNN architecture
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# Convolutional layers
# Input 32x32x3
self.conv1 = nn.Conv2d(in_channels=3,
out_channels=16,
kernel_size=(3, 3),
stride=(1, 1),
padding=1)
# Input 16x16x16
self.conv2 = nn.Conv2d(in_channels=16,
out_channels=32,
kernel_size=(3, 3),
stride=(1, 1),
padding=1)
# Input 8x8x32
self.conv3 = nn.Conv2d(in_channels=32,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
padding=1)
# Max pooling layer
self.pool = nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
# Dropout
self.dropout = nn.Dropout(p=0.25)
# Linear Layers
# Input flatten 4x4x64 -> 500
self.fc1 = nn.Linear(in_features=4 * 4 * 64,
out_features=128)
# Input 500, Output 10
self.fc2 = nn.Linear(in_features=128,
out_features=10)
def forward(self, x):
# Add sequence of convolutional and max pooling layers
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = self.pool(F.relu(self.conv3(x)))
# Flatten the image
x = x.view(-1, 64 * 4 * 4)
# First fully-connected layer
x = F.relu(self.fc1(x))
# Adding dropout
x = self.dropout(x)
# Adding second layer
x = self.fc2(x)
return x
# Create a complete CNN
model = Net()
print(model)
# Move tensors to GPU if CUDA is available
if train_on_gpu:
model.cuda()
Decide on a loss and optimization function that is best suited for this classification task. The linked code examples from above, may be a good starting point; this PyTorch classification example or this, more complex Keras example. Pay close attention to the value for learning rate as this value determines how your model converges to a small error.
In [9]:
import torch.optim as optim
# Specify loss function
criterion = nn.CrossEntropyLoss()
# Specify optimizer
optimizer = optim.Adam(params=model.parameters(),
lr=0.01)
In [10]:
# Number of epochs to train the model
n_epochs = 50
# Initialize validation loss
valid_loss_min = np.Inf
for epoch in range(1, n_epochs+1):
# Keep track of training and validation loss
train_loss = 0.0
valid_loss = 0.0
model.train()
for data, target in train_loader:
# Move tensors to GPU if CUDA is available
if train_on_gpu:
data, target = data.cuda(), target.cuda()
# Clear the gradients of all optimized variables
optimizer.zero_grad()
# Forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# Calculate the batch loss
loss = criterion(output, target)
# Backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# Perform a single optimization step (parameter update)
optimizer.step()
# Update training loss
train_loss += loss.item() * data.size(0)
# Validate the model
model.eval()
for data, target in valid_loader:
# Move tensors to GPU if CUDA is available
if train_on_gpu:
data, target = data.cuda(), target.cuda()
# Forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# Calculate the batch loss
loss = criterion(output, target)
# Update average validation loss
valid_loss += loss.item()*data.size(0)
# Calculate average losses
train_loss = train_loss/len(train_loader.dataset)
valid_loss = valid_loss/len(valid_loader.dataset)
# Print training/validation statistics
print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
epoch, train_loss, valid_loss))
# Save model if validation loss has decreased
if valid_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(
valid_loss_min,
valid_loss))
torch.save(model.state_dict(), './models/model_cifar.pth')
valid_loss_min = valid_loss
In [11]:
model.load_state_dict(torch.load('./models/model_cifar.pth'))
In [12]:
# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
model.eval()
# iterate over test data
for data, target in test_loader:
# move tensors to GPU if CUDA is available
if train_on_gpu:
data, target = data.cuda(), target.cuda()
# forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# calculate the batch loss
loss = criterion(output, target)
# update test loss
test_loss += loss.item() * data.size(0)
# convert output probabilities to predicted class
_, pred = torch.max(output, 1)
# compare predictions to true label
correct_tensor = pred.eq(target.data.view_as(pred))
correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
# calculate test accuracy for each object class
for i in range(batch_size):
label = target.data[i]
class_correct[label] += correct[i].item()
class_total[label] += 1
# average test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))
for i in range(10):
if class_total[i] > 0:
print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (classes[i], 100 * class_correct[i] / class_total[i],
np.sum(class_correct[i]), np.sum(class_total[i])))
else:
print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (100. * np.sum(class_correct) / np.sum(class_total),
np.sum(class_correct), np.sum(class_total)))
Answer:
1) Collect more data / Augment the data / Balance the data
2) Test different hyperparamaters / optimizers
3) Try different architecture
In [13]:
# obtain one batch of test images
dataiter = iter(test_loader)
images, labels = dataiter.next()
images.numpy()
# move model inputs to cuda, if GPU available
if train_on_gpu:
images = images.cuda()
# get sample outputs
output = model(images)
# convert output probabilities to predicted class
_, preds_tensor = torch.max(output, 1)
preds = np.squeeze(preds_tensor.numpy()) if not train_on_gpu else np.squeeze(preds_tensor.cpu().numpy())
# plot the images in the batch, along with predicted and true labels
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(20):
ax = fig.add_subplot(2, 20/2, idx+1, xticks=[], yticks=[])
imshow(images[idx])
ax.set_title("{} ({})".format(classes[preds[idx]], classes[labels[idx]]),
color=("green" if preds[idx]==labels[idx].item() else "red"))